cd "/Users/hongxiu/Dropbox/IEAbta/simulation/JAERE submission/"
use "cc_data.dta",clear
set more off

summarize GDP totalGHG DARACCcost2010, detail f
*********************Step1: estimate alpha and beta********************
gen alpha_GHG = 0
gen beta_GHG = 0
gen r2=0
gen r2adj=0
gen fvalue=0
gen totalGHG_sq= totalGHG^2
*Transform GDP from million$ to $
replace GDP=GDP*10^6

*For 194 countries, run regressions to estimate alpha and beta
forvalues i = 1(1)194 { 
reg GDP totalGHG totalGHG_sq if countrycode2==`i', noconstant
estimates store reg`i'
replace alpha_GHG= _b[totalGHG] if countrycode2==`i'
replace beta_GHG = _b[totalGHG_sq] if countrycode2==`i'
replace r2=e(r2) if countrycode2==`i'
replace r2adj=e(r2_a) if countrycode2==`i'
replace fvalue=e(F) if countrycode2==`i'
}

drop if alpha_GHG<=0 | beta_GHG>=0


*Table B.4 Regressions of GDP on GHG Emissions (for a sample of countries)
***********Germany 47 Argentina 7  Thailan 172
estimates restore reg7
eststo:reg
estimates restore reg47
eststo:reg
estimates restore reg172
eststo:reg
*esttab reg7 reg47 reg172 using "reg.tex",  se scalars(N_g r2 )  b(a3) replace depvars title(Regressions of GDP on GHG emissions\label{append_reg})

******************************Step2: Estimate Vulnerability**************************************
/*
2010 the world's CO2 emissions 33615389 kt= 3.36*10^7
2010 the totalGHG = 33615389+7515150+2859834+1015443 = 45005816
gen cccost_GDP = DARACCcost2010*10^6/GDP
*/
keep DARACCcost2010 GDP alpha_GHG beta_GHG countrycode countrycode2 countryname totalGHG year
gen vul = DARACCcost2010*10^6/(33615389^2)
gen vul_GHG = DARACCcost2010*10^6/(45005816^2)
sum vul vul_GHG, detail
gsort countryname -year
duplicates drop countrycode2, force
*10 observations were dropped due to missing vulnerability data
drop if vul_GHG==.| vul_GHG==0
save "temp.dta",replace


******************************Step3: Cluster Countries**************************************
*scale up vulnerability and beta to make them comparable with alpha in kmean clustering.
use "temp.dta",clear
gen vul_GHG_scale = vul_GHG*10^12
gen beta_GHG_scale = beta_GHG*100
*************k-mean gives different results each time depending the starting points. The estimated parameters used in the paper is save in "parameters.csv"
cluster kmeans vul_GHG_scale alpha_GHG beta_GHG_scale, k(10) gen(clstGHG_full_scale) name(clstGHG_full_scale) keepcen iterate(100000)
cluster measures vul_GHG_scale alpha_GHG beta_GHG_scale , compare(1/10) gen(ds1 ds2 ds3 ds4 ds5 ds6 ds7 ds8 ds9 ds10)
sort clstGHG_full_scale ds1-ds10
tabstat vul_GHG alpha_GHG beta_GHG, by(clstGHG_full_scale) stat(mean) save
**********obtain group means
drop ds1-ds10
drop if countrycode2==.
foreach v of varlist alpha_GHG beta_GHG DARACCcost2010 totalGHG {
   by clstGHG_full_scale: egen `v'_mean= mean(`v')
}
duplicates drop alpha_GHG_mean, force


******************************Step 4. Rescaled the vulnerability in a world with representative countries
egen totalGHGtemp = sum(totalGHG_mean)
gen vul_GHG_head = DARACCcost2010*10^6/(totalGHGtemp^2)
keep alpha_GHG_mean beta_GHG_mean vul_GHG_head
rename alpha_GHG_mean alpha
rename beta_GHG_mean beta
rename vul_GHG_head vul
*The end

